
* Stata code to replicate table and figures in Michael A. Clemens and Lant Pritchett, "The New Economic Case for Migration Restrictions: An Assessment"

version 15.1
clear all
set more off


** Change the following line to the filepath for your working directory

global working = "XXXXXXXXXXX"


** Open and prepare IPUMS-USA: American Community Survey 2008-2012 5-Year sample

cd "$working"

log using output.log, replace

use "acs20082012withpuma_reduced.dta", clear

gen female = sex==2
gen linc = ln(incearn)
gen hs = educ==6
gen lhs = educ<=5
gen sc = (educ==7 | educ==8)
gen univ = educ>=10 & educ<99
keep if age>=20 & age<=65
egen agegroup = cut(age), at(5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90)
egen yrsusagroup = cut(yrsusa1), at(0 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75)

gen born_us = bpl>=0 & bpl<=56

* Select origin countries to analyze
gen born_eth = bpld==60044
gen born_hti = bpld==26020
gen born_guy = bpld==30040
gen born_nic = bpld==21060
gen born_bgd = bpld==52110
gen born_som = bpld==60053
gen born_lbr = bpld==60027
gen born_npl = bpld==52400
gen born_mex = bpld==20000
gen born_hnd = bpld==21050
gen born_gha = bpld==60023

local countries "bgd gha guy hti lbr mex npl nic som"
foreach country of local countries {
	gen for_born = born_`country'   // keeps variable name same, eases making table
	reg linc ib30.agegroup lhs sc univ female for_born yrsusa1 c.yrsusa1#c.yrsusa1 [pw=perwt] if (born_us | for_born) 	// base group: 30yr old male, high school.
	nlcom (-1*ln(.5))/((-1*_b[yrsusa1] + (((_b[yrsusa1]^2) - (4*(_b[c.yrsusa1#c.yrsusa1])*(_b[for_born]/2)))^(1/2)))/(2*_b[c.yrsusa1#c.yrsusa1]))
	mat x1 = r(b)
	scalar a = x1[1,1]
	estadd scalar a
	mat x2 = r(V)
	scalar ase = sqrt(x2[1,1])
	estadd scalar ase
	nlcom 1-exp(_b[for_born]+5*_b[yrsusa1]+(5^2)*_b[c.yrsusa1#c.yrsusa1]) 
	mat x3 = r(b)
	scalar d = x3[1,1]
	estadd scalar d
	mat x4 = r(V)
	scalar dse = sqrt(x4[1,1])
	estadd scalar dse
	est store est_`country'
	quietly summ linc if born_us & e(sample)
	sca numus = r(N)    // number of observations for foreign-born
	estadd scalar numus
	quietly summ linc if for_born & e(sample)
	sca numfor = r(N)    // number of observations for foreign-born
	estadd scalar numfor 
	drop for_born
}


** CREATE TABLE 1 

esttab est_bgd est_gha est_guy est_hti est_lbr est_mex est_npl est_nic est_som using tab1.tex, ///
		scalars(numus numfor d dse a ase) alignment(D{.}{.}{5} D{.}{.}{5} D{.}{.}{5} D{.}{.}{5} D{.}{.}{5} D{.}{.}{5} D{.}{.}{5} D{.}{.}{5} D{.}{.}{5}) ///
		page(dcolumn,rotating) label nostar replace booktabs se mtitles(Bangladesh Ghana Guyana Haiti Liberia Mexico Nepal Nicaragua Somalia) ///
		order(_cons for_born yrsusa1 c.yrsusa1#c.yrsusa1) keep(_cons for_born yrsusa1 c.yrsusa1#c.yrsusa1) 

* NOTE THAT THE LAST TWO ROWS OF TABLE 1 ARE CALCULATED IN A SEPARATE EXCEL FILE, INCLUDED IN THIS PACKAGE

		 

** CREATE FIGURE 5

foreach ctry in gha nic {
	foreach x of numlist 0(5)75 {
		gen `ctry'`x' = born_`ctry' & yrsusagroup == `x'
	}
	reg linc ib30.agegroup lhs sc univ female `ctry'0-`ctry'45 [pw=perwt] if (born_us | born_`ctry')
	mat results = (0,0,0,0,0)
	sca intercept = exp(_b[_cons])
	forvalues i = 0(5)45 {
		mat newresult = (0,0,0,0,0)
		lincom `ctry'`i', eform
		sca coeff = r(estimate)*intercept
		sca ll = (r(estimate) - 1.96*r(se))*intercept
		sca ul = (r(estimate) + 1.96*r(se))*intercept
		mat newresult[1,1] = `i'
		mat newresult[1,2] = intercept
		mat newresult[1,3] = coeff
		mat newresult[1,4] = ll
		mat newresult[1,5] = ul
		mat results = results \ newresult
	}
	mat results = results[2...,1...]       // get rid of first row of zeros
	mat colnames results = x b1 b2 blo bhi
	svmat results, names(col)
	graph twoway line b1 b2 blo bhi x, scheme(s1manual) lpattern(solid solid shortdash shortdash) lcolor(gs9 black black black) plotregion(lcolor(none)) ylabel(0(10000)30000, format(%5.0fc)) ytitle("Earned income/year, US$", margin(medium)) xtitle("Years since immigration", margin(medium)) legend(label(1 "U.S. worker") label(2 "Foreign worker") order(1 2) region(lcolor(none))) yscale(range(0 38000))   // note("Male, age 30-34, with high school degree", size(vsmall))
	graph export graph`ctry'.png, width(1600) replace
	drop x b1 b2 blo bhi
}

	

** CREATE FIGURE 7

use "acs20082012withpuma_reduced.dta", clear
gen pop = 1
gen born_us = bpl>=0 & bpl<=56
gen foreign = !born_us
collapse (sum) pop foreign (mean) incearn [pw=perwt], by(puma)
gen for_frac = foreign/pop

lpoly incearn for_frac, generate(x_s y_s) se(se_s) bw(.1) n(200)
gen hi = y_s + 1.96*se_s
gen low = y_s - 1.96*se_s

range x 0 1 1000
gen y = 20500*(1 - ((0.8 * 0.4 * x)/(1 - (.5) * x)))

graph twoway scatter incearn for_frac, msymbol(o) msize(vsmall) mcolor(gs9) || line y_s low hi x_s, lcolor(black gs8 gs8) lpattern(dash solid solid) || line y x, lcolor(blue) scheme(s1manual) plotregion(lcolor(none)) xlabel(0(.2)1) legend(off) aspect(.7) ysc(log) ylabel(5000 10000 20000 40000 80000, format(%6.0fc)) ytitle("Earned income/year, US$ (log scale)", margin(medium)) xlabel(,format(%03.1f)) xtitle("Fraction foreign-born", margin(medium)) text(11000 .94 "{it: c} = 0.5", color(blue)) text(20000 .8 "Nonparametric fit", color(gs8))

graph export puma.png, width(3200) replace

log close

** END
